from __future__ import print_function
import os.path
import pandas as pd
from IPython.display import Image,display
import dalmatian as dm
from IPython.core.display import HTML
from CCLE_postp_function import *
import sys
sys.path.insert(0, '../JKBio/')
import TerraFunction as terra
%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython
from taigapy import TaigaClient
tc = TaigaClient()
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload The rpy2.ipython extension is already loaded. To reload it, use: %reload_ext rpy2.ipython
namespace1="broad-genomics-delivery"
workspace1="Getz_IBM_CellLines_Exomes"
namespace2="broad-firecloud-ccle"
workspace2="CCLE_DepMap_WES"
refnamespace="broad-firecloud-ccle"
refworkspace="DepMap_WES_CN_HG38"
source1="ibm"
source2="ccle"
sample_set_id = "19Q3"
release = sample_set_id
wm1 = dm.WorkspaceManager(namespace1, workspace1)
wm2 = dm.WorkspaceManager(namespace2, workspace2)
refwm = dm.WorkspaceManager(refnamespace, refworkspace)
newsample = createDatasetWithNewCellLines(wm1,refwm,source1,sample_set_id,wm2,source2)
> /Users/jeremie/Documents/Projects/BroadInstitute/ccle_processing/CCLE_postp_function.py(32)createDatasetWithNewCellLines() -> refsamples = wto.get_samples() (Pdb) c
/Users/jeremie/Documents/Projects/BroadInstitute/ccle_processing/CCLE_postp_function.py:32: UserWarning: Boolean Series key will be reindexed to match DataFrame index. refsamples = wto.get_samples() /Users/jeremie/Documents/Projects/BroadInstitute/ccle_processing/CCLE_postp_function.py:32: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. refsamples = wto.get_samples()
uploading new samples Successfully imported 1581 participants. Updating many hound records. Switching to batch updates Hound executing batch upload of 2 records Successfully imported 1806 samples. Updating many hound records. Switching to batch updates Hound executing batch upload of 270902 records creating a sample set Successfully imported 1 sample sets: * 19Q3interim (7 samples)
bamtoubam= "BamToUnmappedRGBams_MC"
ubamtofilelist = "Generate_uBAM_File_List"
realign="Realign_WES_GATK4"
bamtoubam = refwm.get_config(bamtoubam)
bamtoubam
{'deleted': False,
'inputs': {'BamToUnmappedRGBamsWf.input_bam': 'this.WES_bam',
'BamToUnmappedRGBamsWf.preemptible_tries': '3',
'BamToUnmappedRGBamsWf.ValidateSamFile.java_opt': '"-Xmx3000m"',
'BamToUnmappedRGBamsWf.picard_docker': '"broadinstitute/genomes-in-the-cloud:2.3.1-1504795437"',
'BamToUnmappedRGBamsWf.ref_fasta_index': 'workspace.ref_fasta_fai',
'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.disk_size': '400',
'BamToUnmappedRGBamsWf.SortBamByQueryname.mem_size': '"3500 MB"',
'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.mem_size': '"3000 MB"',
'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.output_dir': '"."',
'BamToUnmappedRGBamsWf.SortBamByQueryname.disk_size': '400',
'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.java_opt': '"-Xmx1200m"',
'BamToUnmappedRGBamsWf.picard_path': '"/usr/gitc/"',
'BamToUnmappedRGBamsWf.SortBamByQueryname.java_opt': '"-Xmx3000m"',
'BamToUnmappedRGBamsWf.ref_fasta': 'workspace.ref_fasta',
'BamToUnmappedRGBamsWf.ValidateSamFile.mem_size': '"3500 MB"',
'BamToUnmappedRGBamsWf.ValidateSamFile.disk_size': '400'},
'methodConfigVersion': 4,
'methodRepoMethod': {'methodName': 'BamToUnmappedRGBams',
'methodVersion': 3,
'methodNamespace': 'vdauwera',
'methodUri': 'agora://vdauwera/BamToUnmappedRGBams/3',
'sourceRepo': 'agora'},
'name': 'BamToUnmappedRGBams_MC',
'namespace': 'vdauwera',
'outputs': {'BamToUnmappedRGBamsWf.sortsam_out': 'this.readgroup_ubams',
'BamToUnmappedRGBamsWf.validatesam_out': 'this.ubam_validation_reports'},
'prerequisites': {},
'rootEntityType': 'sample'}
refwm.update_config(bamtoubam)
subid = refwm.create_submission(bamtoubam['name'],sample_set_id,"sample_set","this.samples")
Successfully updated configuration vdauwera/BamToUnmappedRGBams_MC Successfully created submission 8499ce99-8837-4fb5-97c3-bb75f3da9db8.
terra.waitForSubmission(refwm, subid)
1.0 of jobs Succeeded in submission 0.
[]
ubamtofilelist = refwm.get_config(ubamtofilelist)
ubamtofilelist
{'deleted': False,
'inputs': {'ArrayToTxt_workflow.CreateTxt.array_of_files': 'this.readgroup_ubams',
'ArrayToTxt_workflow.CreateTxt.list_name': 'this.name'},
'methodConfigVersion': 2,
'methodRepoMethod': {'methodName': 'ArrayOfFilesToTxt',
'methodVersion': 1,
'methodNamespace': 'gkugener',
'methodUri': 'agora://gkugener/ArrayOfFilesToTxt/1',
'sourceRepo': 'agora'},
'name': 'Generate_uBAM_File_List',
'namespace': 'gkugener',
'outputs': {'ArrayToTxt_workflow.CreateTxt.file_list_name': 'this.unmapped_bams_list'},
'prerequisites': {},
'rootEntityType': 'sample'}
refwm.update_config(ubamtofilelist)
subid = refwm.create_submission(ubamtofilelist['name'],sample_set_id,"sample_set","this.samples")
Successfully updated configuration gkugener/Generate_uBAM_File_List Successfully created submission de547b06-05ed-48f5-8276-a9f263772300.
terra.waitForSubmission(refwm, subid)
1.0 of jobs Succeeded in submission 0.sion 0. 7 mn elapsed.
[]
realign = refwm.get_config(realign)
realign
{'deleted': False,
'inputs': {'PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf': 'workspace.dbsnp_138',
'PreProcessingForVariantDiscovery_GATK4.gatk_launch_path': '"/gatk/"',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_amb': 'workspace.ref_bwa_amb',
'PreProcessingForVariantDiscovery_GATK4.agg_preemptible_tries': '3',
'PreProcessingForVariantDiscovery_GATK4.ref_fasta_index': 'workspace.ref_fasta_fai',
'PreProcessingForVariantDiscovery_GATK4.GetBwaVersion.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.unmapped_bam_suffix': '".bam"',
'PreProcessingForVariantDiscovery_GATK4.ref_dict': 'workspace.ref_dict',
'PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_sort': '"-Xms4000m"',
'PreProcessingForVariantDiscovery_GATK4.gotc_docker': '"broadinstitute/genomes-in-the-cloud:2.3.0-1501082129"',
'PreProcessingForVariantDiscovery_GATK4.picard_docker': '"broadinstitute/genomes-in-the-cloud:2.3.0-1501082129"',
'PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.java_opt': '"-Xms4000m"',
'PreProcessingForVariantDiscovery_GATK4.agg_medium_disk': '500',
'PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf_index': 'workspace.dbsnp_138_idx',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.mem_size': '"32 GB"',
'PreProcessingForVariantDiscovery_GATK4.picard_path': '"/usr/gitc/"',
'PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.java_opt': '"-Xms3000m"',
'PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_fix': '"-Xms500m"',
'PreProcessingForVariantDiscovery_GATK4.gatk_docker': '"broadinstitute/gatk:4.beta.3"',
'PreProcessingForVariantDiscovery_GATK4.flowcell_unmapped_bams_list': 'this.unmapped_bams_list',
'PreProcessingForVariantDiscovery_GATK4.sample_name': 'this.sample_id',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_alt': 'workspace.ref_bwa_alt',
'PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.mem_size': '"16 GB"',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.num_cpu': '"16"',
'PreProcessingForVariantDiscovery_GATK4.agg_large_disk': '500',
'PreProcessingForVariantDiscovery_GATK4.agg_small_disk': '300',
'PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.java_opt': '"-Xms3000m"',
'PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.compression_level': '5',
'PreProcessingForVariantDiscovery_GATK4.CreateSequenceGroupingTSV.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.ref_name': 'workspace.ref_name',
'PreProcessingForVariantDiscovery_GATK4.bwa_commandline': '"bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta"',
'PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.java_opt': '"-Xms3000m"',
'PreProcessingForVariantDiscovery_GATK4.ref_fasta': 'workspace.ref_fasta',
'PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.flowcell_medium_disk': '300',
'PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.mem_size': '"16 GB"',
'PreProcessingForVariantDiscovery_GATK4.flowcell_small_disk': '300',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.java_opt': '"-Xms3000m"',
'PreProcessingForVariantDiscovery_GATK4.known_indels_sites_VCFs': 'workspace.known_indels_array',
'PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.known_indels_sites_indices': 'workspace.known_indels_idx_array',
'PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.mem_size': '"8 GB"',
'PreProcessingForVariantDiscovery_GATK4.preemptible_tries': '3',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_sa': 'workspace.ref_bwa_sa',
'PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.java_opt': '"-Xms2000m"',
'PreProcessingForVariantDiscovery_GATK4.python_docker': '"python:2.7"',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_ann': 'workspace.ref_bwa_ann',
'PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.java_opt': '"-Xms4000m"',
'PreProcessingForVariantDiscovery_GATK4.gotc_path': '"/usr/gitc/"',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_bwt': 'workspace.ref_bwa_bwt',
'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_pac': 'workspace.ref_bwa_pac'},
'methodConfigVersion': 8,
'methodRepoMethod': {'methodName': 'PreProcessingForVariantDiscovery_GATK4',
'methodVersion': 7,
'methodNamespace': 'gatk',
'methodUri': 'agora://gatk/PreProcessingForVariantDiscovery_GATK4/7',
'sourceRepo': 'agora'},
'name': 'Realign_WES_GATK4',
'namespace': 'gatk',
'outputs': {'PreProcessingForVariantDiscovery_GATK4.bqsr_report': 'this.hg38_bqsr_report',
'PreProcessingForVariantDiscovery_GATK4.duplication_metrics': 'this.hg38_duplication_metrics',
'PreProcessingForVariantDiscovery_GATK4.analysis_ready_bam_md5': 'this.hg38_analysis_ready_bam_md5',
'PreProcessingForVariantDiscovery_GATK4.analysis_ready_bam': 'this.hg38_analysis_ready_bam',
'PreProcessingForVariantDiscovery_GATK4.analysis_ready_bam_index': 'this.hg38_analysis_ready_bam_index'},
'prerequisites': {},
'rootEntityType': 'sample'}
refwm.update_config(realign)
subid = refwm.create_submission(realign['name'],sample_set_id,"sample_set","this.samples")
Successfully updated configuration gatk/Realign_WES_GATK4 Successfully created submission ff0fbd27-8a5d-4760-8110-c9b464d23b66.
terra.waitForSubmission(refwm, subid)
status is: Failed for 0 jobs in submission 0. 99 mn elapsed.
---------------------------------------------------------- OSError Traceback (most recent call last) /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 599 body=body, headers=headers, --> 600 chunked=chunked) 601 /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 383 # otherwise it looks like a programming error was the cause. --> 384 six.raise_from(e, None) 385 except (SocketTimeout, BaseSSLError, SocketError) as e: /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value) /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 379 try: --> 380 httplib_response = conn.getresponse() 381 except Exception as e: /anaconda3/envs/py36/lib/python3.6/http/client.py in getresponse(self) 1330 try: -> 1331 response.begin() 1332 except ConnectionError: /anaconda3/envs/py36/lib/python3.6/http/client.py in begin(self) 296 while True: --> 297 version, status, reason = self._read_status() 298 if status != CONTINUE: /anaconda3/envs/py36/lib/python3.6/http/client.py in _read_status(self) 257 def _read_status(self): --> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 259 if len(line) > _MAXLINE: /anaconda3/envs/py36/lib/python3.6/socket.py in readinto(self, b) 585 try: --> 586 return self._sock.recv_into(b) 587 except timeout: /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs) 301 else: --> 302 raise SocketError(str(e)) 303 except OpenSSL.SSL.ZeroReturnError: OSError: (60, 'ETIMEDOUT') During handling of the above exception, another exception occurred: ProtocolError Traceback (most recent call last) /anaconda3/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 448 retries=self.max_retries, --> 449 timeout=timeout 450 ) /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 637 retries = retries.increment(method, url, error=e, _pool=self, --> 638 _stacktrace=sys.exc_info()[2]) 639 retries.sleep() /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace) 367 if read is False or not self._is_method_retryable(method): --> 368 raise six.reraise(type(error), error, _stacktrace) 369 elif read is not None: /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb) 684 if value.__traceback__ is not tb: --> 685 raise value.with_traceback(tb) 686 raise value /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 599 body=body, headers=headers, --> 600 chunked=chunked) 601 /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 383 # otherwise it looks like a programming error was the cause. --> 384 six.raise_from(e, None) 385 except (SocketTimeout, BaseSSLError, SocketError) as e: /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value) /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 379 try: --> 380 httplib_response = conn.getresponse() 381 except Exception as e: /anaconda3/envs/py36/lib/python3.6/http/client.py in getresponse(self) 1330 try: -> 1331 response.begin() 1332 except ConnectionError: /anaconda3/envs/py36/lib/python3.6/http/client.py in begin(self) 296 while True: --> 297 version, status, reason = self._read_status() 298 if status != CONTINUE: /anaconda3/envs/py36/lib/python3.6/http/client.py in _read_status(self) 257 def _read_status(self): --> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 259 if len(line) > _MAXLINE: /anaconda3/envs/py36/lib/python3.6/socket.py in readinto(self, b) 585 try: --> 586 return self._sock.recv_into(b) 587 except timeout: /anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs) 301 else: --> 302 raise SocketError(str(e)) 303 except OpenSSL.SSL.ZeroReturnError: ProtocolError: ('Connection aborted.', OSError("(60, 'ETIMEDOUT')",)) During handling of the above exception, another exception occurred: ConnectionError Traceback (most recent call last) <ipython-input-81-b8781f508f0e> in <module> ----> 1 terra.waitForSubmission(refwm, subid) ~/Documents/Projects/BroadInstitute/JKBio/TerraFunction.py in waitForSubmission(wm, submissions) 21 failed = 0 22 finished=True ---> 23 for wcount, i in enumerate(wm.get_submission(submission_id)["workflows"]): 24 if i['status'] not in {'Done', 'Aborted', 'Failed', 'Succeeded'}: 25 finished=False /anaconda3/envs/py36/lib/python3.6/site-packages/dalmatian/base.py in get_submission(self, submission_id) 456 def get_submission(self, submission_id): 457 """Get submission metadata""" --> 458 r = firecloud.api.get_submission(self.namespace, self.workspace, submission_id) 459 if r.status_code != 200: 460 raise APIException(r) /anaconda3/envs/py36/lib/python3.6/site-packages/firecloud/api.py in get_submission(namespace, workspace, submission_id) 1084 uri = "workspaces/{0}/{1}/submissions/{2}".format(namespace, 1085 workspace, submission_id) -> 1086 return __get(uri) 1087 1088 def get_workflow_metadata(namespace, workspace, submission_id, workflow_id): /anaconda3/envs/py36/lib/python3.6/site-packages/firecloud/api.py in __get(methcall, headers, root_url, **kwargs) 90 if not headers: 91 headers = _fiss_agent_header() ---> 92 r = __SESSION.get(urljoin(root_url, methcall), headers=headers, **kwargs) 93 if fcconfig.verbosity > 1: 94 print('FISSFC call: %s' % r.url, file=sys.stderr) /anaconda3/envs/py36/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs) 544 545 kwargs.setdefault('allow_redirects', True) --> 546 return self.request('GET', url, **kwargs) 547 548 def options(self, url, **kwargs): /anaconda3/envs/py36/lib/python3.6/site-packages/dalmatian/wmanager.py in _firecloud_api_timeout_wrapper(*args, **kwargs) 73 **{ 74 **{'timeout': timeout_state.timeout}, ---> 75 **kwargs 76 } 77 ) /anaconda3/envs/py36/lib/python3.6/site-packages/google/auth/transport/requests.py in request(self, method, url, data, headers, **kwargs) 206 207 response = super(AuthorizedSession, self).request( --> 208 method, url, data=data, headers=request_headers, **kwargs) 209 210 # If the response indicated that the credentials needed to be /anaconda3/envs/py36/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 531 } 532 send_kwargs.update(settings) --> 533 resp = self.send(prep, **send_kwargs) 534 535 return resp /anaconda3/envs/py36/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs) 644 645 # Send the request --> 646 r = adapter.send(request, **kwargs) 647 648 # Total elapsed time of the request (approximately) /anaconda3/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 496 497 except (ProtocolError, socket.error) as err: --> 498 raise ConnectionError(err, request=request) 499 500 except MaxRetryError as e: ConnectionError: ('Connection aborted.', OSError("(60, 'ETIMEDOUT')",))
CNV_woXY = refwm.get_config("CNV_sample_XX")
CNV_woXY
{'deleted': False,
'inputs': {'CNVSomaticPairWorkflow.preemptible_attempts': '5',
'CNVSomaticPairWorkflow.oncotator_docker': '',
'CNVSomaticPairWorkflow.mem_gb_for_call_copy_ratio_segments': '',
'CNVSomaticPairWorkflow.num_smoothing_iterations_per_fit': '',
'CNVSomaticPairWorkflow.ModelSegmentsNormal.output_dir': '',
'CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.output_dir': '',
'CNVSomaticPairWorkflow.calling_copy_ratio_z_score_threshold': '',
'CNVSomaticPairWorkflow.minor_allele_fraction_prior_alpha': '',
'CNVSomaticPairWorkflow.ModelSegmentsTumor.output_dir': '',
'CNVSomaticPairWorkflow.gatk_docker': 'workspace.gatk_docker',
'CNVSomaticPairWorkflow.num_changepoints_penalty_factor': '',
'CNVSomaticPairWorkflow.common_sites': 'workspace.common_sites_hg38',
'CNVSomaticPairWorkflow.tumor_bam_idx': 'this.hg38_analysis_ready_bam_index',
'CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.cpu': '',
'CNVSomaticPairWorkflow.mem_gb_for_oncotator': '',
'CNVSomaticPairWorkflow.neutral_segment_copy_ratio_upper_bound': '',
'CNVSomaticPairWorkflow.minimum_base_quality': '',
'CNVSomaticPairWorkflow.mem_gb_for_denoise_read_counts': '',
'CNVSomaticPairWorkflow.genotyping_base_error_rate': '',
'CNVSomaticPairWorkflow.emergency_extra_disk': '',
'CNVSomaticPairWorkflow.ModelSegmentsNormal.cpu': '',
'CNVSomaticPairWorkflow.CallCopyRatioSegmentsTumor.cpu': '',
'CNVSomaticPairWorkflow.ModelSegmentsNormal.normal_allelic_counts': '',
'CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.output_dir': '',
'CNVSomaticPairWorkflow.ref_fasta_fai': 'workspace.ref_fasta_fai',
'CNVSomaticPairWorkflow.CollectCountsTumor.cpu': '',
'CNVSomaticPairWorkflow.kernel_approximation_dimension': '',
'CNVSomaticPairWorkflow.outlier_neutral_segment_copy_ratio_z_score_threshold': '',
'CNVSomaticPairWorkflow.kernel_variance_copy_ratio': '',
'CNVSomaticPairWorkflow.additional_args_for_oncotator': '',
'CNVSomaticPairWorkflow.format': '',
'CNVSomaticPairWorkflow.mem_gb_for_model_segments': '',
'CNVSomaticPairWorkflow.mem_gb_for_plotting': '',
'CNVSomaticPairWorkflow.min_total_allele_count': '',
'CNVSomaticPairWorkflow.ref_fasta': 'workspace.ref_fasta',
'CNVSomaticPairWorkflow.num_burn_in_allele_fraction': '',
'CNVSomaticPairWorkflow.smoothing_threshold_allele_fraction': '',
'CNVSomaticPairWorkflow.ModelSegmentsTumor.cpu': '',
'CNVSomaticPairWorkflow.mem_gb_for_collect_counts': '',
'CNVSomaticPairWorkflow.ref_fasta_dict': 'workspace.ref_dict',
'CNVSomaticPairWorkflow.minimum_contig_length': '',
'CNVSomaticPairWorkflow.smoothing_threshold_copy_ratio': '',
'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.output_dir': '',
'CNVSomaticPairWorkflow.tumor_bam': 'this.hg38_analysis_ready_bam',
'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.cpu': '',
'CNVSomaticPairWorkflow.normal_bam_idx': '',
'CNVSomaticPairWorkflow.CollectAllelicCountsNormal.cpu': '',
'CNVSomaticPairWorkflow.neutral_segment_copy_ratio_lower_bound': '',
'CNVSomaticPairWorkflow.num_samples_allele_fraction': '',
'CNVSomaticPairWorkflow.max_num_segments_per_chromosome': '',
'CNVSomaticPairWorkflow.blacklist_intervals': '',
'CNVSomaticPairWorkflow.kernel_scaling_allele_fraction': '',
'CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.cpu': '',
'CNVSomaticPairWorkflow.mem_gb_for_preprocess_intervals': '',
'CNVSomaticPairWorkflow.genotyping_homozygous_log_ratio_threshold': '',
'CNVSomaticPairWorkflow.max_num_smoothing_iterations': '',
'CNVSomaticPairWorkflow.PreprocessIntervals.cpu': '',
'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.output_dir': '',
'CNVSomaticPairWorkflow.is_run_oncotator': '',
'CNVSomaticPairWorkflow.padding': '250',
'CNVSomaticPairWorkflow.mem_gb_for_collect_allelic_counts': '',
'CNVSomaticPairWorkflow.normal_bam': '',
'CNVSomaticPairWorkflow.CollectCountsNormal.cpu': '',
'CNVSomaticPairWorkflow.num_burn_in_copy_ratio': '',
'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.cpu': '',
'CNVSomaticPairWorkflow.DenoiseReadCountsNormal.cpu': '',
'CNVSomaticPairWorkflow.CollectAllelicCountsTumor.cpu': '',
'CNVSomaticPairWorkflow.intervals': 'workspace.ice_xx_intervals_no_pad',
'CNVSomaticPairWorkflow.gatk4_jar_override': '',
'CNVSomaticPairWorkflow.CallCopyRatioSegmentsNormal.cpu': '',
'CNVSomaticPairWorkflow.DenoiseReadCountsTumor.cpu': '',
'CNVSomaticPairWorkflow.number_of_eigensamples': '',
'CNVSomaticPairWorkflow.window_sizes': '',
'CNVSomaticPairWorkflow.num_samples_copy_ratio': '',
'CNVSomaticPairWorkflow.read_count_pon': 'workspace.ice_pon_xx',
'CNVSomaticPairWorkflow.bin_length': '0',
'CNVSomaticPairWorkflow.kernel_variance_allele_fraction': ''},
'methodConfigVersion': 7,
'methodRepoMethod': {'methodName': 'CNV_Somatic_Pair_Workflow',
'methodVersion': 9,
'methodNamespace': 'gatk',
'methodUri': 'agora://gatk/CNV_Somatic_Pair_Workflow/9',
'sourceRepo': 'agora'},
'name': 'CNV_sample_XX',
'namespace': 'gatk',
'outputs': {'CNVSomaticPairWorkflow.copy_ratio_legacy_segments_normal': 'this.copy_ratio_legacy_segments_normal',
'CNVSomaticPairWorkflow.het_allelic_counts_normal': 'this.het_allelic_counts_normal',
'CNVSomaticPairWorkflow.allelic_counts_normal': 'this.allelic_counts_normal',
'CNVSomaticPairWorkflow.copy_ratio_parameters_normal': 'this.copy_ratio_parameters_normal',
'CNVSomaticPairWorkflow.allele_fraction_legacy_segments_normal': 'this.allele_fraction_legacy_segments_normal',
'CNVSomaticPairWorkflow.normal_het_allelic_counts_normal': 'this.normal_het_allelic_counts_normal',
'CNVSomaticPairWorkflow.allele_fraction_parameters_normal': 'this.allele_fraction_parameters_normal',
'CNVSomaticPairWorkflow.modeled_segments_begin_tumor': 'this.modeled_segments_begin_tumor',
'CNVSomaticPairWorkflow.copy_ratio_parameters_begin_normal': 'this.copy_ratio_parameters_begin_normal',
'CNVSomaticPairWorkflow.copy_ratio_parameters_tumor': 'this.copy_ratio_parameters_tumor',
'CNVSomaticPairWorkflow.denoised_MAD_normal': 'this.denoised_MAD_normal',
'CNVSomaticPairWorkflow.scaled_delta_MAD_tumor': 'this.scaled_delta_MAD_tumor',
'CNVSomaticPairWorkflow.het_allelic_counts_tumor': 'this.het_allelic_counts_tumor',
'CNVSomaticPairWorkflow.scaled_delta_MAD_normal': 'this.scaled_delta_MAD_normal',
'CNVSomaticPairWorkflow.standardized_MAD_tumor': 'this.standardized_MAD_tumor',
'CNVSomaticPairWorkflow.denoised_copy_ratios_plot_tumor': 'this.denoised_copy_ratios_plot_tumor',
'CNVSomaticPairWorkflow.copy_ratio_only_segments_tumor': 'this.copy_ratio_only_segments_tumor',
'CNVSomaticPairWorkflow.oncotated_called_gene_list_file_tumor': 'this.oncotated_called_gene_list_file_tumor',
'CNVSomaticPairWorkflow.denoised_copy_ratios_normal': 'this.denoised_copy_ratios_normal',
'CNVSomaticPairWorkflow.called_copy_ratio_segments_normal': 'this.called_copy_ratio_segments_normal',
'CNVSomaticPairWorkflow.denoised_copy_ratios_lim_4_plot_normal': 'this.denoised_copy_ratios_lim_4_plot_normal',
'CNVSomaticPairWorkflow.oncotated_called_file_tumor': 'this.oncotated_called_file_tumor',
'CNVSomaticPairWorkflow.allele_fraction_parameters_begin_normal': 'this.allele_fraction_parameters_begin_normal',
'CNVSomaticPairWorkflow.delta_MAD_normal': 'this.delta_MAD_normal',
'CNVSomaticPairWorkflow.denoised_MAD_tumor': 'this.denoised_MAD_tumor',
'CNVSomaticPairWorkflow.normal_het_allelic_counts_tumor': 'this.normal_het_allelic_counts_tumor',
'CNVSomaticPairWorkflow.copy_ratio_only_segments_normal': 'this.copy_ratio_only_segments_normal',
'CNVSomaticPairWorkflow.modeled_segments_begin_normal': 'this.modeled_segments_begin_normal',
'CNVSomaticPairWorkflow.denoised_copy_ratios_tumor': 'this.denoised_copy_ratios_tumor',
'CNVSomaticPairWorkflow.denoised_copy_ratios_lim_4_plot_tumor': 'this.denoised_copy_ratios_lim_4_plot_tumor',
'CNVSomaticPairWorkflow.allelic_counts_entity_id_normal': 'this.allelic_counts_entity_id_normal',
'CNVSomaticPairWorkflow.modeled_segments_tumor': 'this.modeled_segments_tumor',
'CNVSomaticPairWorkflow.allele_fraction_parameters_begin_tumor': 'this.allele_fraction_parameters_begin_tumor',
'CNVSomaticPairWorkflow.allelic_counts_tumor': 'this.allelic_counts_tumor',
'CNVSomaticPairWorkflow.read_counts_entity_id_tumor': 'this.read_counts_entity_id_tumor',
'CNVSomaticPairWorkflow.standardized_MAD_normal': 'this.standardized_MAD_normal',
'CNVSomaticPairWorkflow.delta_MAD_tumor': 'this.delta_MAD_tumor',
'CNVSomaticPairWorkflow.read_counts_normal': 'this.read_counts_normal',
'CNVSomaticPairWorkflow.read_counts_tumor': 'this.read_counts_tumor',
'CNVSomaticPairWorkflow.modeled_segments_plot_normal': 'this.modeled_segments_plot_normal',
'CNVSomaticPairWorkflow.modeled_segments_plot_tumor': 'this.modeled_segments_plot_tumor',
'CNVSomaticPairWorkflow.denoised_copy_ratios_plot_normal': 'this.denoised_copy_ratios_plot_normal',
'CNVSomaticPairWorkflow.modeled_segments_normal': 'this.modeled_segments_normal',
'CNVSomaticPairWorkflow.allele_fraction_legacy_segments_tumor': 'this.allele_fraction_legacy_segments_tumor',
'CNVSomaticPairWorkflow.allelic_counts_entity_id_tumor': 'this.allelic_counts_entity_id_tumor',
'CNVSomaticPairWorkflow.copy_ratio_legacy_segments_tumor': 'this.copy_ratio_legacy_segments_tumor',
'CNVSomaticPairWorkflow.standardized_copy_ratios_normal': 'this.standardized_copy_ratios_normal',
'CNVSomaticPairWorkflow.called_copy_ratio_segments_tumor': 'this.called_copy_ratio_segments_tumor',
'CNVSomaticPairWorkflow.read_counts_entity_id_normal': 'this.read_counts_entity_id_normal',
'CNVSomaticPairWorkflow.preprocessed_intervals': 'this.preprocessed_intervals',
'CNVSomaticPairWorkflow.standardized_copy_ratios_tumor': 'this.standardized_copy_ratios_tumor',
'CNVSomaticPairWorkflow.allele_fraction_parameters_tumor': 'this.allele_fraction_parameters_tumor',
'CNVSomaticPairWorkflow.copy_ratio_parameters_begin_tumor': 'this.copy_ratio_parameters_begin_tumor'},
'prerequisites': {},
'rootEntityType': 'sample'}
refwm.update_config(CNV_woXY)
submission_id = refwm.create_submission(CNV_woXY['name'],etype='sample_set',entity=sample_set_id,expression='this.samples')
Successfully updated configuration gatk/CNV_sample_XX Successfully created submission 7bb83274-49d1-463c-9a4c-d120110fd42b.
terra.waitForSubmission(refwm,submission_id)
1.0 of jobs Succeeded in submission 0.sion 0. 70 mn elapsed.
[]
aggregate = refwm.get_config("Aggregate_CN_seg_files")
aggregate
{'deleted': False,
'inputs': {'aggregate_CN_segments_wrkflw.aggregate_CN_segments.disk_space': '10',
'aggregate_CN_segments_wrkflw.aggregate_CN_segments.num_preempt': '5',
'aggregate_CN_segments_wrkflw.aggregate_CN_segments.aggregate_seg_files_script': 'workspace.cn_single_file_script',
'aggregate_CN_segments_wrkflw.aggregate_CN_segments.memory': '5',
'aggregate_CN_segments_wrkflw.aggregate_CN_segments.sample_set_id': 'this.name',
'aggregate_CN_segments_wrkflw.aggregate_CN_segments.sample_seg_files': 'this.samples.called_copy_ratio_segments_tumor'},
'methodConfigVersion': 4,
'methodRepoMethod': {'methodName': 'Aggregate_CN_seg_files',
'methodVersion': 2,
'methodNamespace': 'gkugener',
'methodUri': 'agora://gkugener/Aggregate_CN_seg_files/2',
'sourceRepo': 'agora'},
'name': 'Aggregate_CN_seg_files',
'namespace': 'gkugener',
'outputs': {'aggregate_CN_segments_wrkflw.aggregate_CN_segments.combined_cn_file': 'this.combined_seg_file'},
'prerequisites': {},
'rootEntityType': 'sample_set'}
submission_id = refwm.create_submission(aggregate['name'],entity=sample_set_id)
Successfully created submission 5c76b219-404f-479a-9b7c-7cf5699fc161.
terra.waitForSubmission(refwm,submission_id)
1.0 of jobs Succeeded in submission 0.sion 0. 1 mn elapsed.
[]
aggregated = refwm.get_entities('sample_set').loc[sample_set_id]["combined_seg_file"]
aggregated
'gs://fc-secure-d2a2d895-a7af-4117-bdc7-652d7d268324/5c76b219-404f-479a-9b7c-7cf5699fc161/aggregate_CN_segments_wrkflw/c0119f63-73d2-4f47-a6ec-88f24f74f3f2/call-aggregate_CN_segments/19Q3.called.seg'
! gsutil cp $aggregated "temp/cnv_ccle.called.seg"
Updates are available for some Cloud SDK components. To install them, please run: $ gcloud components update Copying gs://fc-secure-d2a2d895-a7af-4117-bdc7-652d7d268324/5c76b219-404f-479a-9b7c-7cf5699fc161/aggregate_CN_segments_wrkflw/c0119f63-73d2-4f47-a6ec-88f24f74f3f2/call-aggregate_CN_segments/19Q3.called.seg... - [1 files][ 67.2 KiB/ 67.2 KiB] Operation completed over 1 objects/67.2 KiB.
%%R
source('../JKBio/gkugener/RScripts/load_libraries_and_annotations.R')
2019-07-05 11:45:25::WARNING R[write to console]:
Attachement du package : ‘dplyr’
2019-07-05 11:45:25::WARNING R[write to console]: The following objects are masked from ‘package:plyr’:
arrange, count, desc, failwith, id, mutate, rename, summarise,
summarize
2019-07-05 11:45:25::WARNING R[write to console]: The following objects are masked from ‘package:stats’:
filter, lag
2019-07-05 11:45:25::WARNING R[write to console]: The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
2019-07-05 11:45:26::WARNING R[write to console]: ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
2019-07-05 11:45:26::WARNING R[write to console]: ✔ ggplot2 3.2.0 ✔ readr 1.3.1
✔ tibble 2.1.3 ✔ purrr 0.3.2
✔ tidyr 0.8.3 ✔ stringr 1.4.0
✔ ggplot2 3.2.0 ✔ forcats 0.4.0
2019-07-05 11:45:26::WARNING R[write to console]: ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::arrange() masks plyr::arrange()
✖ purrr::compact() masks plyr::compact()
✖ dplyr::count() masks plyr::count()
✖ dplyr::failwith() masks plyr::failwith()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::id() masks plyr::id()
✖ dplyr::lag() masks stats::lag()
✖ dplyr::mutate() masks plyr::mutate()
✖ dplyr::rename() masks plyr::rename()
✖ dplyr::summarise() masks plyr::summarise()
✖ dplyr::summarize() masks plyr::summarize()
2019-07-05 11:45:26::WARNING R[write to console]:
Attachement du package : ‘magrittr’
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:purrr’:
set_names
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:tidyr’:
extract
2019-07-05 11:45:26::WARNING R[write to console]:
Attachement du package : ‘reshape2’
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:tidyr’:
smiths
2019-07-05 11:45:26::WARNING R[write to console]:
Attachement du package : ‘gridExtra’
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:dplyr’:
combine
2019-07-05 11:45:26::WARNING R[write to console]:
Attachement du package : ‘ggridges’
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:ggplot2’:
scale_discrete_manual
2019-07-05 11:45:26::WARNING R[write to console]: Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
2019-07-05 11:45:26::WARNING R[write to console]:
Attachement du package : ‘GGally’
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:dplyr’:
nasa
2019-07-05 11:45:26::WARNING R[write to console]:
Attachement du package : ‘plotly’
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:ggplot2’:
last_plot
2019-07-05 11:45:26::WARNING R[write to console]: The following objects are masked from ‘package:plyr’:
arrange, mutate, rename, summarise
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:stats’:
filter
2019-07-05 11:45:26::WARNING R[write to console]: The following object is masked from ‘package:graphics’:
layout
2019-07-05 11:45:26::WARNING R[write to console]: Le chargement a nécessité le package : grid
2019-07-05 11:45:26::WARNING R[write to console]: Le chargement a nécessité le package : futile.logger
2019-07-05 11:45:27::WARNING R[write to console]: Registering fonts with R
2019-07-05 11:45:27::WARNING R[write to console]:
Attachement du package : ‘cowplot’
2019-07-05 11:45:27::WARNING R[write to console]: The following object is masked from ‘package:ggthemes’:
theme_map
2019-07-05 11:45:27::WARNING R[write to console]: The following object is masked from ‘package:ggplot2’:
ggsave
2019-07-05 11:45:27::WARNING R[write to console]: Error in library(networkD3) :
aucun package nommé ‘networkD3’ n'est trouvé
Calls: <Anonymous> ... withVisible -> source -> withVisible -> eval -> eval -> library
Error in library(networkD3) : aucun package nommé ‘networkD3’ n'est trouvé Calls: <Anonymous> ... withVisible -> source -> withVisible -> eval -> eval -> library
%%R
source("CCLE_postp_function.R")
genome_version <- 'hg38'
release <- '19Q3'
hg38_cyto_band_reference <- 'data/hg38_cytoband.gz'
new_samples_copy_number_broad_wes <- 'temp/cnv_ccle.called.seg'
%%R
# Previous release copy number profiles. This line will need to be updated as well
wes.priority.cn.seg.profiles <- taigr::load.from.taiga(data.name='segmented-cn-wes-prioritzed-7fe1', data.file='wes.19Q3interim.segmented') %>%
dplyr::select(DepMap_ID, Chromosome, Start, End, Num_Probes, Segment_Mean, Source)
wes.priority.cn.gene.profiles <- taigr::load.from.taiga(data.name='segmented-cn-wes-prioritzed-7fe1', data.file='wes.19Q3interim.gene')
Fetching https://cds.team/taiga/api/datafile?format=metadata&dataset_permaname=segmented-cn-wes-prioritzed-7fe1&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:35::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Could not find segmented-cn-wes-prioritzed-7fe1 in cache, requesting from taiga... Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:35::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Taiga needs to convert data to rds before we can fetch it. Waiting... Conversion pending Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:36::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Running conversion Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:38::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:40::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:43::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented Status 200
2019-07-05 11:46:49::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8. 2019-07-05 11:46:49::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.segmented ...
|======================================================================| 100% Saving segmented-cn-wes-prioritzed-7fe1 in cache ( 08b499f2757143d8addd9c6c89be643e wes.19Q3interim.segmented )...
2019-07-05 11:46:54::WARNING R[write to console]: Saved to cache as 08b499f2757143d8addd9c6c89be643e_wes-19q3interim-segmented.rds
writing /Users/jeremie/.taiga/08b499f2757143d8addd9c6c89be643e_wes-19q3interim-segmented.idx /Users/jeremie/.taiga/segmented-cn-wes-prioritzed-7fe1_wes-19q3interim-segmented_28.idx Fetching https://cds.team/taiga/api/datafile?format=metadata&dataset_permaname=segmented-cn-wes-prioritzed-7fe1&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:46:54::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Could not find segmented-cn-wes-prioritzed-7fe1 in cache, requesting from taiga... Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:46:54::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Taiga needs to convert data to rds before we can fetch it. Waiting... Conversion pending Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:46:56::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Downloading from S3 Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:46:57::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:47:00::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Running conversion Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:47:03::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:47:08::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:47:16::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8.
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene Status 200
2019-07-05 11:47:26::WARNING R[write to console]: No encoding supplied: defaulting to UTF-8. 2019-07-05 11:47:26::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:47:47::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:49:28::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:49:43::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:49:57::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:50:11::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:50:24::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100%
2019-07-05 11:50:34::WARNING R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...
|======================================================================| 100% Saving segmented-cn-wes-prioritzed-7fe1 in cache ( 08b499f2757143d8addd9c6c89be643e wes.19Q3interim.gene )...
2019-07-05 11:51:04::WARNING R[write to console]: Saved to cache as 08b499f2757143d8addd9c6c89be643e_wes-19q3interim-gene.rds
writing /Users/jeremie/.taiga/08b499f2757143d8addd9c6c89be643e_wes-19q3interim-gene.idx /Users/jeremie/.taiga/segmented-cn-wes-prioritzed-7fe1_wes-19q3interim-gene_28.idx
%%R
segments <- processSegments(new_samples_copy_number_broad_wes)
segments <- filterForCCLE(segments)
segments <- interpolateGapsInSegmented(segments)$segs
segments <- extendEndsOfSegments(segments,'../JKBio/data/hg38_cytoband.gz')
print(segments)
print(head(wes.priority.cn.seg.profiles))
segments_unfiltered <- reprioritizeData(segments, wes.priority.cn.seg.profiles)
# Fill in the gaps on the entire dataset
# Extend start sites to 1, end sites to the end of the chromosome?
[1] 0
2019-07-05 12:10:13::WARNING R[write to console]: Parsed with column specification: cols( X1 = col_character(), X2 = col_double(), X3 = col_double(), X4 = col_character(), X5 = col_character() )
# A tibble: 1,276 x 7 DepMap_ID seqnames start end Num_Probes Segment_Mean Source <chr> <fct> <dbl> <dbl> <dbl> <dbl> <chr> 1 ibm_ACH-000658 chr1 1 12785414 1710 0.880 Broad WES 2 ibm_ACH-000658 chr1 12785415 13084566 19 1.96 Broad WES 3 ibm_ACH-000658 chr1 13084567 16448617 336 0.902 Broad WES 4 ibm_ACH-000658 chr1 16448618 16976116 65 1.58 Broad WES 5 ibm_ACH-000658 chr1 16976117 17085179 50 0.791 Broad WES 6 ibm_ACH-000658 chr1 17085180 33327415 2124 0.891 Broad WES 7 ibm_ACH-000658 chr1 33327416 33608515 47 1.12 Broad WES 8 ibm_ACH-000658 chr1 33608516 85736977 3854 0.889 Broad WES 9 ibm_ACH-000658 chr1 85736978 86120112 54 1.22 Broad WES 10 ibm_ACH-000658 chr1 86120113 111627657 1569 0.895 Broad WES # … with 1,266 more rows DepMap_ID Chromosome Start End Num_Probes Segment_Mean Source 1 ACH-000001 1 1 1969745 286 2.546065 Sanger WES 2 ACH-000001 1 1969746 6354345 365 2.175759 Sanger WES 3 ACH-000001 1 6354346 6958256 100 3.109430 Sanger WES 4 ACH-000001 1 6958257 15977206 884 2.134831 Sanger WES 5 ACH-000001 1 15977207 16174774 57 2.952592 Sanger WES 6 ACH-000001 1 16174775 16275770 19 1.710129 Sanger WES # A tibble: 1,276 x 7 DepMap_ID Chromosome Start End Num_Probes Segment_Mean Source <chr> <fct> <dbl> <dbl> <dbl> <dbl> <chr> 1 ibm_ACH-000658 chr1 1 12785414 1710 0.880 Broad W… 2 ibm_ACH-000658 chr1 12785415 13084566 19 1.96 Broad W… 3 ibm_ACH-000658 chr1 13084567 16448617 336 0.902 Broad W… 4 ibm_ACH-000658 chr1 16448618 16976116 65 1.58 Broad W… 5 ibm_ACH-000658 chr1 16976117 17085179 50 0.791 Broad W… 6 ibm_ACH-000658 chr1 17085180 33327415 2124 0.891 Broad W… 7 ibm_ACH-000658 chr1 33327416 33608515 47 1.12 Broad W… 8 ibm_ACH-000658 chr1 33608516 85736977 3854 0.889 Broad W… 9 ibm_ACH-000658 chr1 85736978 86120112 54 1.22 Broad W… 10 ibm_ACH-000658 chr1 86120113 111627657 1569 0.895 Broad W… # … with 1,266 more rows
%%R
embargoed <- "WES_embargoed.txt"
blacklisted <- "blacklist.txt"
%%R
library('rmutil')
read.list(file=blacklisted, skip=0, nlines=1, order=NULL)
2019-07-05 12:29:09::WARNING R[write to console]: Registered S3 method overwritten by 'rmutil':
method from
print.response httr
2019-07-05 12:29:09::WARNING R[write to console]:
Attaching package: ‘rmutil’
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:AnnotationDbi’:
as.data.frame
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:IRanges’:
as.data.frame
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:S4Vectors’:
as.data.frame
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:Biobase’:
description
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:BiocGenerics’:
as.data.frame
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:stats4’:
nobs
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:tidyr’:
nesting
2019-07-05 12:29:09::WARNING R[write to console]: The following object is masked from ‘package:stats’:
nobs
2019-07-05 12:29:09::WARNING R[write to console]: The following objects are masked from ‘package:base’:
as.data.frame, units
2019-07-05 12:29:09::WARNING R[write to console]: Error in scan(file, skip = skip, nlines = nlines, quiet = TRUE) :
scan() attendait 'a real' et a reçu 'ACH-001434'
Calls: <Anonymous> ... <Anonymous> -> <Anonymous> -> withVisible -> read.list -> scan
Error in scan(file, skip = skip, nlines = nlines, quiet = TRUE) : scan() attendait 'a real' et a reçu 'ACH-001434' Calls: <Anonymous> ... <Anonymous> -> <Anonymous> -> withVisible -> read.list -> scan
%%R
head(corner(segments_unfiltered))
DepMap_ID Chromosome Start End Num_Probes 1 ACH-000001 1 1 1969745 286 2 ACH-000001 1 1969746 6354345 365 3 ACH-000001 1 6354346 6958256 100 4 ACH-000001 1 6958257 15977206 884 5 ACH-000001 1 15977207 16174774 57
%%R
# TODO: IF seqnames (CHR) are 1-9 values, append "chr" in front of each! (bulk)
# ifelse(grepl("[0-9]+", new_copy_number$ChrChromosome), "chr"+, "no")
segments_blacklisted <- filterBlackListedLine(filepath=blacklisted,segments_unfiltered)
segments_embargoed <- filterBlackListedLine(filepath=embargoed,segments_unfiltered)
2019-07-05 12:11:30::WARNING R[write to console]: Parsed with column specification: cols( `ACH-001434` = col_character() ) 2019-07-05 12:11:30::WARNING R[write to console]: Parsed with column specification: cols( `ACH-001279` = col_character() )
%%R
segments_unfiltered <- dplyr::rename(segments_unfiltered, seqnames=Chromosomes, start=Start, end=End)
2019-07-05 12:12:00::WARNING R[write to console]: Error in .f(.x[[i]], ...) : objet 'Chromosomes' introuvable Calls: <Anonymous> ... <Anonymous> -> vars_rename_eval -> map_if -> map -> .f
Error in .f(.x[[i]], ...) : objet 'Chromosomes' introuvable Calls: <Anonymous> ... <Anonymous> -> vars_rename_eval -> map_if -> map -> .f
%%R
write.table(segments_unfiltered, file = paste0("temp/wes.",release,".segmented.cn"), sep = ',', quote = F, row.names = F)
%%R
dim(segments_unfiltered)
[1] 327663 7
%%R
# What we upload to taiga
# TODO : change column name again
segments_blacklisted <- dplyr::rename(segments_blacklisted, seqnames=Chromosomes, start=Start, end=End)
segments_embargoed <- dplyr::rename(segments_embargoed, seqnames=Chromosomes, start=Start, end=End)
%%R
write.table(segments_blacklisted, file = paste0('temp/wes.',release,'balcklisted.segmented.cn'), sep = ',', quote = F, row.names = T)
write.table(segments_embargoed, file = paste0('temp/wes.',release,'.embargoed.segmented.cn'), sep = ',', quote = F, row.names = T)
%%R
entrezgenes <- generateEntrezGenes()
%%R
head(corner(entrezgenes))
EGID SYMBOL CHR CHRLOC CHRLOCEND 1 1 A1BG 19 58346805 58353499 2 10 NAT2 8 18391281 18401215 3 100 ADA 20 44619518 44651758 4 1000 CDH2 18 27950962 28177130 5 10000 AKT3 1 243488232 243843584
%%R
source("CCLE_postp_function.R")
res <- generateGeneLevelMatrixFromSegments(entrezgenes, segments_unfiltered)
%%R
res_embargoed <- generateGeneLevelMatrixFromSegments(entrezgenes, segments_embargoed)
res_balcklisted <- generateGeneLevelMatrixFromSegments(entrezgenes, segments_balcklisted)
%%R
genematrix_unfiltered <- res$gene_level_data_hg38
corner(genematrix_unfiltered)
A1BG (1) NAT2 (10) ADA (100) CDH2 (1000) AKT3 (10000) ACH-000001 1.265172 1.1415254 1.2958359 0.6627515 1.012791 ACH-000002 1.013633 0.9741737 1.0175121 1.4893275 1.012618 ACH-000003 1.031898 1.0958118 1.8768326 0.5364814 1.002498 ACH-000004 1.349290 1.0865089 0.7855092 1.0770610 1.086057 ACH-000005 1.222033 1.1622070 0.7612536 1.0724579 1.120937
genematrix_embargoed <- res_embargoed$gene_level_data_hg38
corner(genematrix_embargoed)
genematrix_balcklisted <- res_balcklisted$gene_level_data_hg38
corner(genematrix_balcklisted)
%%R
write.table(genematrix_unfiltered, file = paste0('temp/wes.',release,'.gene.cn'),
sep = ',', quote = F, row.names = T)
write.table(genematrix_blacklisted, file = paste0('temp/wes.', release,'balcklisted.gene.cn'),
sep = ',', quote = F, row.names = T)
write.table(genematrix_embargoed, file = paste0('temp/wes.', release,'.embargoed.gene.cn'),
sep = ',', quote = F, row.names = T)
genecn = pd.read_csv('temp/wes.'+release+'.gene.cn', sep = ',')
segmentcn = pd.read_csv('temp/cnv_ccle.called.seg', sep = '\t')
segmentcn
| Sample | CONTIG | START | END | NUM_POINTS_COPY_RATIO | MEAN_LOG2_COPY_RATIO | CALL | |
|---|---|---|---|---|---|---|---|
| 0 | ibm_ACH-000658 | chr1 | 785776 | 12777851 | 1710 | -0.183785 | - |
| 1 | ibm_ACH-000658 | chr1 | 12792977 | 13053864 | 19 | 0.970490 | + |
| 2 | ibm_ACH-000658 | chr1 | 13115269 | 16448393 | 336 | -0.149112 | 0 |
| 3 | ibm_ACH-000658 | chr1 | 16448842 | 16975982 | 65 | 0.662344 | + |
| 4 | ibm_ACH-000658 | chr1 | 16976250 | 17084089 | 50 | -0.338767 | - |
| 5 | ibm_ACH-000658 | chr1 | 17086270 | 33326211 | 2124 | -0.166508 | 0 |
| 6 | ibm_ACH-000658 | chr1 | 33328619 | 33606240 | 47 | 0.166090 | + |
| 7 | ibm_ACH-000658 | chr1 | 33610790 | 85736809 | 3854 | -0.170009 | 0 |
| 8 | ibm_ACH-000658 | chr1 | 85737145 | 86115630 | 54 | 0.287622 | + |
| 9 | ibm_ACH-000658 | chr1 | 86124594 | 111564204 | 1569 | -0.160833 | 0 |
| 10 | ibm_ACH-000658 | chr1 | 111691110 | 111739494 | 8 | -1.558503 | - |
| 11 | ibm_ACH-000658 | chr1 | 111755451 | 121568451 | 713 | -0.058538 | 0 |
| 12 | ibm_ACH-000658 | chr1 | 143880473 | 145784846 | 43 | 1.153831 | + |
| 13 | ibm_ACH-000658 | chr1 | 145788518 | 146020481 | 110 | 0.649050 | + |
| 14 | ibm_ACH-000658 | chr1 | 146069292 | 147162032 | 24 | 1.336794 | + |
| 15 | ibm_ACH-000658 | chr1 | 147162189 | 147993169 | 56 | 0.697550 | + |
| 16 | ibm_ACH-000658 | chr1 | 147995347 | 148149818 | 14 | 2.030036 | + |
| 17 | ibm_ACH-000658 | chr1 | 148482507 | 152315708 | 604 | 0.656568 | + |
| 18 | ibm_ACH-000658 | chr1 | 152350359 | 152412485 | 4 | 0.924756 | + |
| 19 | ibm_ACH-000658 | chr1 | 152511284 | 152885025 | 20 | 0.849826 | + |
| 20 | ibm_ACH-000658 | chr1 | 152909547 | 155033349 | 469 | 0.642168 | + |
| 21 | ibm_ACH-000658 | chr1 | 155033350 | 155034979 | 4 | 0.218625 | + |
| 22 | ibm_ACH-000658 | chr1 | 155040234 | 155235496 | 111 | 0.589880 | + |
| 23 | ibm_ACH-000658 | chr1 | 155235497 | 155237830 | 3 | 0.206812 | + |
| 24 | ibm_ACH-000658 | chr1 | 155237883 | 156311447 | 317 | 0.532224 | + |
| 25 | ibm_ACH-000658 | chr1 | 156311790 | 157544689 | 275 | 0.721402 | + |
| 26 | ibm_ACH-000658 | chr1 | 157544690 | 157698152 | 26 | 0.763880 | + |
| 27 | ibm_ACH-000658 | chr1 | 157698153 | 160176854 | 305 | 0.645724 | + |
| 28 | ibm_ACH-000658 | chr1 | 160177268 | 160186519 | 4 | 0.728144 | + |
| 29 | ibm_ACH-000658 | chr1 | 160186520 | 161368120 | 324 | 0.577146 | + |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1246 | ibm_ACH-002446 | chr19 | 43798204 | 45419449 | 330 | -0.062992 | 0 |
| 1247 | ibm_ACH-002446 | chr19 | 45420073 | 45687979 | 69 | -0.134149 | 0 |
| 1248 | ibm_ACH-002446 | chr19 | 45688136 | 48444189 | 487 | -0.055875 | 0 |
| 1249 | ibm_ACH-002446 | chr19 | 48445755 | 48446592 | 2 | 0.526846 | + |
| 1250 | ibm_ACH-002446 | chr19 | 48446593 | 48758472 | 93 | 0.017552 | 0 |
| 1251 | ibm_ACH-002446 | chr19 | 48795175 | 49024230 | 108 | -0.074870 | 0 |
| 1252 | ibm_ACH-002446 | chr19 | 49031845 | 49451189 | 155 | -0.108942 | 0 |
| 1253 | ibm_ACH-002446 | chr19 | 49451234 | 49453673 | 2 | -0.133832 | 0 |
| 1254 | ibm_ACH-002446 | chr19 | 49458235 | 51380384 | 574 | -0.116783 | 0 |
| 1255 | ibm_ACH-002446 | chr19 | 51380385 | 51411623 | 5 | -0.407410 | - |
| 1256 | ibm_ACH-002446 | chr19 | 51413461 | 51627790 | 44 | 0.029062 | 0 |
| 1257 | ibm_ACH-002446 | chr19 | 51643104 | 51693589 | 6 | -1.829497 | - |
| 1258 | ibm_ACH-002446 | chr19 | 51713176 | 54240058 | 366 | -0.076702 | 0 |
| 1259 | ibm_ACH-002446 | chr19 | 54240059 | 54242109 | 3 | -1.852959 | - |
| 1260 | ibm_ACH-002446 | chr19 | 54250535 | 54280381 | 22 | -0.214645 | 0 |
| 1261 | ibm_ACH-002446 | chr19 | 54280382 | 54587598 | 84 | -0.110817 | 0 |
| 1262 | ibm_ACH-002446 | chr19 | 54593994 | 54601069 | 9 | 1.345228 | + |
| 1263 | ibm_ACH-002446 | chr19 | 54630716 | 54906429 | 91 | 0.115034 | 0 |
| 1264 | ibm_ACH-002446 | chr19 | 54906430 | 55028114 | 35 | -0.355607 | - |
| 1265 | ibm_ACH-002446 | chr19 | 55031888 | 58572882 | 645 | -0.033537 | 0 |
| 1266 | ibm_ACH-002446 | chr20 | 87459 | 14685211 | 933 | -0.375277 | - |
| 1267 | ibm_ACH-002446 | chr20 | 15229689 | 26091794 | 545 | -0.782074 | - |
| 1268 | ibm_ACH-002446 | chr20 | 26103222 | 48748132 | 1966 | 0.206010 | + |
| 1269 | ibm_ACH-002446 | chr20 | 48921639 | 64273852 | 1233 | 0.694498 | + |
| 1270 | ibm_ACH-002446 | chr21 | 9068165 | 10650084 | 30 | -1.730240 | - |
| 1271 | ibm_ACH-002446 | chr21 | 13371245 | 46664624 | 1921 | -0.413457 | - |
| 1272 | ibm_ACH-002446 | chr22 | 15698411 | 50799394 | 4022 | -0.095532 | 0 |
| 1273 | ibm_ACH-002446 | chrX | 2781815 | 49317540 | 2166 | -0.906567 | - |
| 1274 | ibm_ACH-002446 | chrX | 49322943 | 49323511 | 1 | -29.450117 | - |
| 1275 | ibm_ACH-002446 | chrX | 49345546 | 155545528 | 4264 | -0.877244 | - |
1276 rows × 7 columns
prev = set(tc.get(name='segmented-cn-wes-prioritzed-7fe1', version=27, file='wes.19Q3interim.gene').index.values.tolist())
new1 = set(genecn.index.values.tolist())
new2 = set(segmentcn['Sample'].values.tolist())
print(len(prev), len(prev & new1), len(new1), len(new1 & new2))
1695 1695 1702 7
checkAmountOfSegments(segmentcn,thresh = 750)
checkGeneChangeAccrossAll(genecn, thresh=1.5)
array([], dtype=object)
newsamples = list(set(segmentcn["Sample"].tolist()))
segmentcn
| Sample | CONTIG | START | END | NUM_POINTS_COPY_RATIO | MEAN_LOG2_COPY_RATIO | CALL | |
|---|---|---|---|---|---|---|---|
| 0 | ibm_ACH-001518 | chr1 | 785776 | 16456145 | 2069 | 0.066190 | 0 |
| 1 | ibm_ACH-001518 | chr1 | 16458591 | 16975982 | 61 | 0.569518 | + |
| 2 | ibm_ACH-001518 | chr1 | 16976250 | 101022017 | 7024 | 0.025315 | 0 |
| 3 | ibm_ACH-001518 | chr1 | 101025058 | 149487635 | 1672 | 0.252054 | + |
| 4 | ibm_ACH-001518 | chr1 | 149487762 | 149488437 | 1 | -3.705048 | - |
| 5 | ibm_ACH-001518 | chr1 | 149783752 | 248918615 | 8383 | 0.031559 | 0 |
| 6 | ibm_ACH-001518 | chr2 | 41357 | 47570583 | 2999 | 0.003066 | 0 |
| 7 | ibm_ACH-001518 | chr2 | 47678000 | 47783745 | 2 | -19.944018 | - |
| 8 | ibm_ACH-001518 | chr2 | 47790676 | 86847025 | 2131 | -0.013272 | 0 |
| 9 | ibm_ACH-001518 | chr2 | 86857806 | 87187492 | 3 | -4.853835 | - |
| 10 | ibm_ACH-001518 | chr2 | 87338261 | 88861558 | 64 | -0.115386 | 0 |
| 11 | ibm_ACH-001518 | chr2 | 88861725 | 90234982 | 54 | 0.579912 | + |
| 12 | ibm_ACH-001518 | chr2 | 91940935 | 178554334 | 4570 | -0.001669 | 0 |
| 13 | ibm_ACH-001518 | chr2 | 178554335 | 178680581 | 152 | 0.458965 | + |
| 14 | ibm_ACH-001518 | chr2 | 178680829 | 178834690 | 139 | -0.284919 | - |
| 15 | ibm_ACH-001518 | chr2 | 178836615 | 219515558 | 2806 | 0.001125 | 0 |
| 16 | ibm_ACH-001518 | chr2 | 219531507 | 221430403 | 89 | 0.696533 | + |
| 17 | ibm_ACH-001518 | chr2 | 221433891 | 242004734 | 1751 | 0.035405 | 0 |
| 18 | ibm_ACH-001518 | chr3 | 197398 | 198170102 | 11743 | 0.022799 | 0 |
| 19 | ibm_ACH-001518 | chr4 | 85481 | 165182806 | 6880 | 0.038329 | 0 |
| 20 | ibm_ACH-001518 | chr4 | 165207626 | 190082636 | 818 | -0.974435 | - |
| 21 | ibm_ACH-001518 | chr5 | 140057 | 36976654 | 1244 | 0.070916 | 0 |
| 22 | ibm_ACH-001518 | chr5 | 36984425 | 37065144 | 38 | -0.570981 | - |
| 23 | ibm_ACH-001518 | chr5 | 37107351 | 172968851 | 6503 | 0.025073 | 0 |
| 24 | ibm_ACH-001518 | chr5 | 172969162 | 175926991 | 72 | -0.954518 | - |
| 25 | ibm_ACH-001518 | chr5 | 175967691 | 181261065 | 753 | 0.066945 | 0 |
| 26 | ibm_ACH-001518 | chr6 | 203183 | 32005939 | 2092 | 0.039428 | 0 |
| 27 | ibm_ACH-001518 | chr6 | 32005940 | 32098456 | 67 | -0.511668 | - |
| 28 | ibm_ACH-001518 | chr6 | 32115488 | 170583999 | 7631 | 0.029312 | 0 |
| 29 | ibm_ACH-001518 | chr7 | 192950 | 142066081 | 7902 | 0.038898 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2296 | ibm_ACH-002069 | chr19 | 50876221 | 51627790 | 167 | 0.333163 | + |
| 2297 | ibm_ACH-002069 | chr19 | 51643104 | 51645656 | 4 | -29.707383 | - |
| 2298 | ibm_ACH-002069 | chr19 | 51692734 | 54219697 | 358 | 0.231685 | + |
| 2299 | ibm_ACH-002069 | chr19 | 54220373 | 54239341 | 9 | 1.368374 | + |
| 2300 | ibm_ACH-002069 | chr19 | 54239446 | 54455767 | 76 | 0.185947 | + |
| 2301 | ibm_ACH-002069 | chr19 | 54455768 | 54822110 | 106 | 0.477611 | + |
| 2302 | ibm_ACH-002069 | chr19 | 54824777 | 55181960 | 143 | 0.209391 | + |
| 2303 | ibm_ACH-002069 | chr19 | 55181961 | 58572882 | 565 | 0.317124 | + |
| 2304 | ibm_ACH-002069 | chr20 | 87459 | 64273852 | 4677 | 0.257016 | + |
| 2305 | ibm_ACH-002069 | chr21 | 9068165 | 10542698 | 9 | -0.820465 | - |
| 2306 | ibm_ACH-002069 | chr21 | 10543076 | 14643383 | 65 | 0.120287 | 0 |
| 2307 | ibm_ACH-002069 | chr21 | 14658498 | 25706340 | 123 | 0.810637 | + |
| 2308 | ibm_ACH-002069 | chr21 | 25709183 | 25709701 | 1 | -2.197575 | - |
| 2309 | ibm_ACH-002069 | chr21 | 25711971 | 29065357 | 126 | 0.880122 | + |
| 2310 | ibm_ACH-002069 | chr21 | 29066465 | 43797591 | 1106 | 0.465867 | + |
| 2311 | ibm_ACH-002069 | chr21 | 43797592 | 46664624 | 521 | -0.123504 | 0 |
| 2312 | ibm_ACH-002069 | chr22 | 15698411 | 22646638 | 723 | 0.110341 | 0 |
| 2313 | ibm_ACH-002069 | chr22 | 22646639 | 22901545 | 33 | 0.616361 | + |
| 2314 | ibm_ACH-002069 | chr22 | 22906092 | 38962354 | 1814 | 0.067907 | 0 |
| 2315 | ibm_ACH-002069 | chr22 | 38964362 | 38992738 | 8 | -22.977457 | - |
| 2316 | ibm_ACH-002069 | chr22 | 39014112 | 42128567 | 504 | -0.001952 | 0 |
| 2317 | ibm_ACH-002069 | chr22 | 42128568 | 42141260 | 5 | 0.413569 | + |
| 2318 | ibm_ACH-002069 | chr22 | 42141261 | 50507942 | 823 | 0.075072 | 0 |
| 2319 | ibm_ACH-002069 | chr22 | 50515471 | 50799394 | 112 | -1.193511 | - |
| 2320 | ibm_ACH-002069 | chrX | 2781815 | 8170446 | 112 | -0.926466 | - |
| 2321 | ibm_ACH-002069 | chrX | 8465649 | 8533305 | 2 | -3.840768 | - |
| 2322 | ibm_ACH-002069 | chrX | 8534068 | 14920061 | 280 | -1.034343 | - |
| 2323 | ibm_ACH-002069 | chrX | 15244255 | 79172261 | 3074 | -0.027348 | 0 |
| 2324 | ibm_ACH-002069 | chrX | 79360838 | 85092813 | 114 | -0.580956 | - |
| 2325 | ibm_ACH-002069 | chrX | 85092935 | 155545528 | 2849 | -0.971653 | - |
2326 rows × 7 columns
newsamples= refwm.get_sample_sets().loc[sample_set_id].samples
samples = refwm.get_samples()
plots = samples.loc[samples.index.isin(newsamples)]["modeled_segments_plot_tumor"].values
for plot in plots:
! gsutil cp $plot temp/
for plot in plots:
display(Image('temp/'+plot.split('/')[-1]))
tc.update_dataset(dataset_permaname="segmented-cn-wes-prioritzed-7fe1",
upload_file_path_dict={'temp/wes.'+sample_set_id+'.gene.cn': 'NumericMatrixCSV',
'temp/wes.'+sample_set_id+'.segmented.cn': 'TableCSV'},
dataset_description="updating to "+sample_set_id,
force_remove=True
)
Uploading wes.19Q3.gene...
---------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-180-343c29b9514b> in <module> 3 'temp/wes.'+sample_set_id+'.segmented.cn': 'TableCSV'}, 4 dataset_description="updating to "+sample_set_id, ----> 5 force_remove=True 6 ) /anaconda3/envs/py36/lib/python3.6/site-packages/taigapy/__init__.py in update_dataset(self, dataset_id, dataset_permaname, dataset_version, dataset_description, upload_file_path_dict, force_keep, force_remove) 659 keep_datafile_id_list = [] 660 --> 661 new_session_id = self.upload_session_files(upload_file_path_dict=upload_file_path_dict) 662 663 new_dataset_version_params = dict() /anaconda3/envs/py36/lib/python3.6/site-packages/taigapy/__init__.py in upload_session_files(self, upload_file_path_dict) 499 500 s3_client.upload_file(upload_file_path, bucket, --> 501 upload_file_object.prefix_and_file_name) 502 503 S3UploadedData = s3_client.get_object(Bucket=bucket, Key=upload_file_object.prefix_and_file_name) /anaconda3/envs/py36/lib/python3.6/site-packages/boto3/s3/inject.py in upload_file(self, Filename, Bucket, Key, ExtraArgs, Callback, Config) 129 return transfer.upload_file( 130 filename=Filename, bucket=Bucket, key=Key, --> 131 extra_args=ExtraArgs, callback=Callback) 132 133 /anaconda3/envs/py36/lib/python3.6/site-packages/boto3/s3/transfer.py in upload_file(self, filename, bucket, key, callback, extra_args) 277 filename, bucket, key, extra_args, subscribers) 278 try: --> 279 future.result() 280 # If a client error was raised, add the backwards compatibility layer 281 # that raises a S3UploadFailedError. These specific errors were only /anaconda3/envs/py36/lib/python3.6/site-packages/s3transfer/futures.py in result(self) 107 except KeyboardInterrupt as e: 108 self.cancel() --> 109 raise e 110 111 def cancel(self): /anaconda3/envs/py36/lib/python3.6/site-packages/s3transfer/futures.py in result(self) 104 # however if a KeyboardInterrupt is raised we want want to exit 105 # out of this and propogate the exception. --> 106 return self._coordinator.result() 107 except KeyboardInterrupt as e: 108 self.cancel() /anaconda3/envs/py36/lib/python3.6/site-packages/s3transfer/futures.py in result(self) 258 # possible value integer value, which is on the scale of billions of 259 # years... --> 260 self._done_event.wait(MAXINT) 261 262 # Once done waiting, raise an exception if present or return the /anaconda3/envs/py36/lib/python3.6/threading.py in wait(self, timeout) 549 signaled = self._flag 550 if not signaled: --> 551 signaled = self._cond.wait(timeout) 552 return signaled 553 /anaconda3/envs/py36/lib/python3.6/threading.py in wait(self, timeout) 293 try: # restore state no matter what (e.g., KeyboardInterrupt) 294 if timeout is None: --> 295 waiter.acquire() 296 gotit = True 297 else: KeyboardInterrupt: